Technotools

home *** CD-ROM | disk | FTP | other *** search

/ Technotools / Technotools (Chestnut CD-ROM)(1993).ISO / lang_oth / forchek1 / forlex.c < prev next >

Wrap

C/C++ Source or Header | 1991-11-06 | 44KB | 1,773 lines

/* forlex.c: Tokenizing routines for Fortran program checker. Copyright (C) 1991 by Robert K. Moniot. This program is free software. Permission is granted to modify it and/or redistribute it, retaining this notice. No guarantees accompany this software. Contains three previously independent modules: I. Forlex -- yylex function which gives tokens to the parser, and related functions. II. Advance -- bottom-level scanning of input stream. III. Keywords -- disambiguates keywords from identifiers. Scan ahead for the label I. II. or III. to find desired module. */ /* Declarations shared by all modules */ #include <stdio.h> #include <ctype.h> #include <string.h> #ifdef __STDC__ #include <stdlib.h> #else char *calloc(); #endif #include "forchek.h" #include "tokdefs.h" #include "symtab.h" /* lexdefs.h: Macros and shared info for lexical analysis routines */ #define EOL '\n' /* Character for end of line, not of statement */ extern YYSTYPE yylval; /* Lexical value for Yacc */ /* Since EOS is special, need special macros for it */ #define makeupper(C) (((C) != EOS && islower((int)(C)))? toupper((int)(C)):(C)) #define iswhitespace(C) ( (C) != EOS && isspace((int)(C)) ) #define isadigit(C) ( (C) != EOS && isdigit((int)(C)) ) #define isaletter(C) ( (C) != EOS && isalpha((int)(C)) ) /* define isidletter to allow underscore and/or dollar sign or not */ #if ALLOW_UNDERSCORES && ALLOW_DOLLARSIGNS /* both underscore and dollar sign */ #define isidletter(C) ( (C) != EOS && (isalpha((int)(C)) || \ (C) == '_' || (C) == '$') ) #else #if ALLOW_UNDERSCORES /* underscore and not dollar sign */ #define isidletter(C) ( (C) != EOS && (isalpha((int)(C))||(C) == '_') ) #else #if ALLOW_DOLLARSIGNS /* dollar sign and not underscore */ #define isidletter(C) ( (C) != EOS && (isalpha((int)(C))||(C) == '$') ) #else /* neither dollar sign nor underscore */ #define isidletter(C) isaletter(C) #endif #endif #endif int inside_quotes, /* TRUE when reading a string */ curr_char, /* Current input character */ next_char; /* Lookahead character */ extern int complex_const_allowed, /* shared flags operated by fortran.y */ inside_format, integer_context; extern int stmt_sequence_no; /* shared with fortran.y */ /* Declare shared lexical routines */ void advance(); int is_keyword(), looking_at(); /* I. Forlex Shared functions defined: yylex() Returns next token. Called from yyparse(). implied_id_token(t,s) Creates token for blank common declaration. Note: compilation options LEX_STORE_STRINGS and LEX_STORE_HOLLERITHS: Define the macro name LEX_STORE_STRINGS to build a version of forchek that stores string constants, and LEX_STORE_HOLLERITHS to store hollerith constants. Now that INCLUDE statements are supported, strings must be stored. Holleriths are not used, so they need not be stored. */ #define LEX_STORE_STRINGS #include <math.h> /* The following macro says whether a given character is legal, * i.e. one of the stream control chars or a valid ANSI Fortran * character. Lower case letters are considered legal too. * Nondigits in columns 1-6 (except EOF,EOS) are illegal */ #define islegal(C) ( ((C) == EOF) || ((C) == EOS) || \ ( (col_num >= 6 || isdigit(C)) && \ ((C) >= ' ' && (C) <= 'z' && legal_chars[(C)-' '] == (C))) ) /* Array has x where ASCII character is not valid */ PRIVATE char legal_chars[]= #ifdef ALLOW_UNDERSCORES " xxx$xx'()*+,-./0123456789:xx=xxx\ ABCDEFGHIJKLMNOPQRSTUVWXYZxxxx_xabcdefghijklmnopqrstuvwxyz"; #else " xxx$xx'()*+,-./0123456789:xx=xxx\ ABCDEFGHIJKLMNOPQRSTUVWXYZxxxxxxabcdefghijklmnopqrstuvwxyz"; #endif /* local functions defined */ PRIVATE void get_dotted_keyword(), get_hollerith(), get_identifier(), get_illegal_token(), get_label(), get_letter(), get_number(), get_punctuation(), get_string(), get_complex_const(); /* Gets next token for Yacc. Return value is token.class, * and a copy of the token is stored in yylval. */ int yylex() { Token token; /* Initialize token fields to scratch. */ token.subclass = 0; token.value.integer = 0; if(curr_char == EOF) { token.class = EOF; token.line_num = line_num; token.col_num = col_num; } else { /* Skip leading spaces, and give error message if non-ANSI * characters are found. */ while(iswhitespace(curr_char) || (! islegal(curr_char)) ) { if(!iswhitespace(curr_char)) yyerror("Illegal character"); advance(); } token.line_num = line_num; token.col_num = col_num; if(isadigit(curr_char)) { if(col_num < 6) get_label(&token); /* Stmt label */ else get_number(&token); /* Numeric or hollerith const */ } else if(isaletter(curr_char)) { if(implicit_letter_flag) get_letter(&token); /* letter in IMPLICIT list */ else get_identifier(&token); /* Identifier or keyword */ } else { switch(curr_char) { #ifdef ALLOW_UNDERSCORES case '_': get_identifier(&token); /* Identifier with initial _ */ break; #endif case '.': if(isadigit(next_char)) get_number(&token); /* Numeric const */ else if(isaletter(next_char)) get_dotted_keyword(&token); /* .EQ. etc. */ else { get_punctuation(&token); /* "." out of place */ } break; case '\'': get_string(&token); /* Quoted string */ break; default: get_punctuation(&token); /* Punctuation character */ break; } } } if(token.class == EOS) { implicit_flag=FALSE; /* in case of errors, reset flags */ implicit_letter_flag = FALSE; } prev_token_class = token.class; yylval = token; return token.class; } /* yylex */ /* Fills argument with token for an identifer, as if an identifer * with name given by string s had been lexed. This will * be called by parser when blank common declaration is seen, * and when a main prog without program statement is found * so processing of named and unnamed cases can be handled uniformly. */ void implied_id_token(t,s) Token *t; char *s; { int h; unsigned long hnum; hnum = hash(s); while( h=hnum%HASHSZ, hashtab[h].name != NULL && strcmp(hashtab[h].name,s) != 0) hnum = rehash(hnum); if(hashtab[h].name == NULL) { /* not seen before */ hashtab[h].name = s; hashtab[h].loc_symtab = NULL; hashtab[h].glob_symtab = NULL; hashtab[h].com_loc_symtab = NULL; hashtab[h].com_glob_symtab = NULL; } t->class = tok_identifier; t->value.integer = h; } /* implied_id_token */ struct { char *name; int class,subclass; } dotted_keywords[]={ {"FALSE",tok_logical_const,FALSE}, {"TRUE",tok_logical_const,TRUE}, {"EQ",tok_relop,relop_EQ}, {"NE",tok_relop,relop_NE}, {"LE",tok_relop,relop_LE}, {"LT",tok_relop,relop_LT}, {"GE",tok_relop,relop_GE}, {"GT",tok_relop,relop_GT}, {"AND",tok_AND,0}, {"OR",tok_OR,0}, {"EQV",tok_EQV,0}, {"NEQV",tok_NEQV,0}, {"NOT",tok_NOT,0}, {NULL,0,0} }; PRIVATE void get_dotted_keyword(token) Token *token; { char s[8]; int i=0; initial_flag = FALSE; advance(); /* gobble the initial '.' */ while(isaletter(curr_char)) { if(i < 7) s[i++] = makeupper(curr_char); advance(); } s[i] = '\0'; if(curr_char != '.') { yyerror("Badly formed logical/relational operator or constant"); } else { advance(); /* gobble the final '.' */ } for(i=0; dotted_keywords[i].name != NULL; i++) { if(strcmp(s,dotted_keywords[i].name) == 0) { token->class = dotted_keywords[i].class; token->subclass = dotted_keywords[i].subclass; token->value.string = dotted_keywords[i].name; if(debug_lexer) fprintf(list_fd,"\nDotted keyword:\t\t%s", dotted_keywords[i].name); return; } } /* Match not found: signal an error */ yyerror("Unknown logical/relational operator or constant"); get_illegal_token(token); } /* get_dotted_keyword */ PRIVATE void get_hollerith(token,n) /* Gets string of form nHaaaa */ Token *token; int n; { int i; /* Holl. consts are not stored unless the macro name LEX_STORE_HOLLERITHS is defined. */ #ifdef LEX_STORE_HOLLERITHS char *s; #else char *s = "Not stored"; #endif initial_flag = FALSE; inside_quotes = TRUE; #ifdef LEX_STORE_HOLLERITHS if( (s=(char *)calloc((unsigned)(n+1),1)) == (char *)NULL ) { fprintf(stderr,"Out of string space at line %u\n",line_num); for(i=0; i<n; i++) advance(); } else #endif { for(i=0; i<n; i++) { while(curr_char == EOL) /* Skip over end of line */ advance(); if(curr_char == EOS || curr_char == EOF) { n = i; /* Premature end of string */ } else { #ifdef LEX_STORE_HOLLERITHS s[i] = curr_char; #endif advance(); } } #ifdef LEX_STORE_HOLLERITHS s[n] = '\0'; #endif } inside_quotes = FALSE; token->class = tok_hollerith; token->value.string = s; if(debug_lexer) fprintf(list_fd,"\nHollerith:\t\t%s",s); } /* get_hollerith */ PRIVATE void get_identifier(token) Token *token; { char s[MAXIDSIZE+1]; int i=0; /* This loop gets letter [letter|digit]* forms */ while(isidletter(curr_char) || isadigit(curr_char)) { if(i < MAXIDSIZE) s[i++] = makeupper(curr_char); advance(); } /* If followed by .number then it may be a FORMAT edit descriptor. */ if(inside_format && curr_char == '.' && isadigit(next_char) ) { if(i < MAXIDSIZE) s[i++] = curr_char; /* store the '.' */ advance(); while( isadigit(curr_char) ) { if(i < MAXIDSIZE) s[i++] = curr_char; advance(); } token->class = tok_edit_descriptor; } else token->class = tok_identifier; s[i++] = '\0'; if(token->class == tok_edit_descriptor) { token->value.string = NULL; } else { /* it is an identifier or keyword */ int keywd_class; if( (keywd_class = is_keyword(s)) != 0) { token->class = keywd_class; /* It's a keyword */ token->value.string = NULL; } else { /* Identifier: find its hashtable entry or create a new entry. */ token->value.integer = hash_lookup(s); } } if(debug_lexer){ switch(token->class) { case tok_edit_descriptor: fprintf(list_fd,"\nEdit descriptor:\t%s",s); break; case tok_identifier: fprintf(list_fd,"\nIdentifier:\t\t%s",s); break; default: fprintf(list_fd,"\nKeyword:\t\ttok_%s",s); break; } } } /* get_identifier */ PRIVATE void get_illegal_token(token) /* Handle an illegal input situation */ Token *token; { token->class = tok_illegal; if(debug_lexer) fprintf(list_fd,"\nILLEGAL TOKEN"); } /* get_illegal_token */ /* Read a label from label field. */ PRIVATE void get_label(token) Token *token; { int value=0; while( isadigit(curr_char) && col_num < 6 ) { value = value*10 + (curr_char-'0'); advance(); } token->class = tok_label; token->subclass = value; if(debug_lexer) fprintf(list_fd,"\nLabel:\t\t\t%d",value); } /* get_label */ PRIVATE void get_letter(token) /* Gets letter in IMPLICIT list */ Token *token; { token->class = tok_letter; token->subclass = makeupper(curr_char); if(debug_lexer) fprintf(list_fd,"\nLetter:\t\t\t%c",token->subclass); advance(); } /* get_letter */ /* get_number reads a number and determines data type: integer, * real, or double precision. */ #ifdef BLANKS_IN_NUMBERS /* tolerate blanks within numbers */ #define SKIP_SP while(iswhitespace(curr_char)) advance() #else #define SKIP_SP #endif PRIVATE void get_number(token) Token *token; { double dvalue,leftside,rightside,pwr_of_ten; int exponent,expsign,datatype,c,digit_seen=FALSE; initial_flag = FALSE; leftside = 0.0; datatype = tok_integer_const; while(isadigit(curr_char)) { leftside = leftside*10.0 + (double)(curr_char-'0'); advance(); SKIP_SP; digit_seen = TRUE; } /* If context specifies integer expected, skip to end. Otherwise scan on ahead for more. */ if( integer_context) { if(!digit_seen) { yyerror("integer expected"); advance(); /* gobble something to avoid infinite loop */ } } else {/* not integer_context */ if( makeupper(curr_char) == 'H' ){ /* nnH means hollerith */ advance(); if(leftside == 0.0) { yyerror("Zero-length hollerith constant"); get_illegal_token(token); } else { get_hollerith(token, (int)leftside); } return; } rightside = 0.0; pwr_of_ten = 1.0; if( curr_char == '.' && ! looking_at(tok_relop) ) { /* don't be fooled by 1.eq.N */ datatype = tok_real_const; advance(); SKIP_SP; while(isadigit(curr_char)) { rightside = rightside*10.0 + (double)(curr_char-'0'); pwr_of_ten *= 0.10; advance(); SKIP_SP; } } if(debug_lexer) dvalue = leftside + rightside*pwr_of_ten; exponent = 0; expsign = 1; #if 0/* old version */ /* If we now see E or D, it is a real/d.p. constant, unless the E or D is followed by w.d which gives an edit descr */ if( ( (c = makeupper(curr_char)) == 'E' || c == 'D' ) && !( datatype==tok_integer_const && looking_at(tok_edit_descriptor))) #else/* new version */ /* Integer followed by E or D gives a real/d.p constant unless we are inside a format statement, in which case we have an edit descriptor. */ if( ( (c = makeupper(curr_char)) == 'E' || c == 'D' ) && !( datatype==tok_integer_const && inside_format) ) #endif { datatype = ((c == 'E')? tok_real_const: tok_dp_const); advance(); if(curr_char == '+') { expsign = 1; advance(); } else if(curr_char == '-') { expsign = -1; advance(); } if(!isadigit(curr_char)) { yyerror("Badly formed real constant"); } else while(isadigit(curr_char)) { exponent = exponent*10 + (curr_char-'0'); advance(); } /* Compute real value only if debugging. If it exceeds max magnitude, computing it may cause crash. At this time, value of real const is not used for anything. */ if(debug_lexer) dvalue *= pow(10.0, (double)(exponent*expsign)); else dvalue = 0.0; } }/* end if(!integer_context) */ token->class = datatype; switch(datatype) { case tok_integer_const: token->value.integer = (int)leftside; if(debug_lexer) fprintf(list_fd,"\nInteger const:\t\t%d",token->value.integer); break; case tok_real_const: /* store single as double lest it overflow */ token->value.dbl = dvalue; if(debug_lexer) fprintf(list_fd,"\nReal const:\t\t%g",token->value.dbl); break; case tok_dp_const: token->value.dbl = dvalue; if(debug_lexer) fprintf(list_fd,"\nDouble const:\t\t%lg",token->value.dbl); break; } } /* get_number */ /* get_complex_constant reads an entity of the form (num,num) where num is any [signed] numeric constant. It will only be called when looking_at() has guaranteed that there is one there. The token receives the real part as a number. The imaginary part is not stored. Whitespace is allowed between ( and num, around the comma, and between num and ) but not within num. */ PRIVATE void get_complex_const(token) Token *token; { Token imag_part; /* temporary to hold imag part */ double sign=1.0; initial_flag = FALSE; advance(); /* skip over the initial paren */ while(iswhitespace(curr_char)) advance(); if(curr_char == '+' || curr_char == '-') { if(curr_char == '-') sign = -1.0; advance(); SKIP_SP; } if(debug_lexer){ fprintf(list_fd,"\nComplex const:("); if(sign < 0.0) fprintf(list_fd," -"); } get_number(token); switch(token->class) { case tok_integer_const: token->value.dbl = sign*(double)token->value.integer; break; case tok_real_const: case tok_dp_const: token->value.dbl = sign*token->value.dbl; break; } token->class = tok_complex_const; while(iswhitespace(curr_char)) advance(); advance(); /* skip over the comma */ while(iswhitespace(curr_char)) advance(); if(curr_char == '+' || curr_char == '-') { if(curr_char == '-') sign = -1.0; advance(); SKIP_SP; } if(debug_lexer){ fprintf(list_fd,"\n,"); if(sign < 0.0) fprintf(list_fd," -"); } get_number(&imag_part); while(iswhitespace(curr_char)) advance(); advance(); /* skip over final paren */ if(debug_lexer) fprintf(list_fd,"\n)"); } PRIVATE void get_punctuation(token) Token *token; { initial_flag = FALSE; if(curr_char == '*' && next_char == '*') { token->class = tok_power; advance(); } else if(curr_char == '/' && next_char == '/' ) { token->class = tok_concat; advance(); } /* paren can be the start of complex constant if everything is just right. Maybe more tests needed here. */ else if(complex_const_allowed && curr_char == '(' && prev_token_class != tok_identifier && looking_at(tok_complex_const)) { get_complex_const(token); return; } else token->class = curr_char; if(debug_lexer) { if(token->class == EOS) fprintf(list_fd,"\n\t\t\tEOS"); else if(token->class == tok_power) fprintf(list_fd,"\nPunctuation:\t\t**"); else if(token->class == tok_concat) fprintf(list_fd,"\nPunctuation:\t\t//"); else fprintf(list_fd,"\nPunctuation:\t\t%c",token->class); } advance(); } /* get_punctuation */ PRIVATE void get_string(token) /* Gets string of form 'aaaa' */ Token *token; { int i,len; /* String consts are not stored unless the macro name LEX_STORE_STRINGS is defined. */ #ifdef LEX_STORE_STRINGS char *s; char tmpstr[MAXSTR+1]; #else char *s = "Not stored"; #endif initial_flag = FALSE; inside_quotes = TRUE; advance(); /* Gobble leading quote */ i = len = 0; for(;;) { while(curr_char == EOL) advance(); if(curr_char == EOS || curr_char == EOF) { yyerror("Closing quote missing from string"); break; } if(curr_char == '\'') { do { /* Handle possible embedded EOL */ advance(); } while(curr_char == EOL); if(curr_char == '\'') { /* '' becomes ' in string */ #ifdef LEX_STORE_STRINGS if(i < MAXSTR) tmpstr[i++] = curr_char; #endif ++len; advance(); } else { break; /* It was a closing quote after all */ } } else { #ifdef LEX_STORE_STRINGS if(i < MAXSTR) tmpstr[i++] = curr_char; #endif ++len; advance(); } } #ifdef LEX_STORE_STRINGS tmpstr[i++] = '\0'; if( (s=(char *)calloc((unsigned)i,1)) == (char *)NULL ) { fprintf(stderr,"Out of space at line %u\n",line_num); } else { (void) strcpy(s,tmpstr); } #endif if(len == 0) { warning(line_num,col_num, "Zero-length string not allowed\n"); } inside_quotes = FALSE; token->class = tok_string; token->value.string = s; if(debug_lexer) fprintf(list_fd,"\nString:\t\t\t%s",s); } /* get_string */ /* End of Forlex module */ /* II. Advance */ /* advance.c: Low-level input routines for Fortran program checker. Shared functions defined: init_scan() Initializes an input stream. finish_scan() Finishes processing an input stream. advance() Reads next char, removing comments and handling continuation lines. looking_at() Handles lookahead up to end of line. flush_line_out(n) Prints lines up to line n if not already printed, so error messages come out looking OK. */ /* Define tab stops: nxttab[col_num] is column of next tab stop */ #define do8(X) X,X,X,X,X,X,X,X PRIVATE int nxttab[]={ 0, do8(9), do8(17), do8(25), do8(33), do8(41), do8(49), do8(57), do8(65), do8(73), do8(81)}; PRIVATE int next_index, /* Index in line of next_char */ prev_comment_line, /* True if previous line was comment */ curr_comment_line, /* True if current line is comment */ noncomment_line_count, /* Number of noncomment lines read so far */ line_is_printed, /* True if line has been flushed (printed) */ prev_line_is_printed, /* True if line has been flushed (printed) */ sticky_EOF; /* Signal to delay EOF a bit for sake of error messages in include files. */ PRIVATE unsigned prev_line_num; /* line number of previous input line */ unsigned prev_stmt_line_num; /* line number of previous noncomment */ PRIVATE char lineA[MAXLINE+1],lineB[MAXLINE+1], /* Buffers holding input lines */ *prev_line,*line; /* Pointers to input buffers */ PRIVATE int is_comment(), is_continuation(), is_overlength(), see_a_number(); PRIVATE char *getstrn(); #ifdef ALLOW_INCLUDE /* Definition of structure for saving the input stream parameters while processing an include file. */ typedef struct { FILE *input_fd; char *fname; char line[MAXLINE]; /* MAXLINE is defined in forchek.h */ int curr_char; int next_char; int next_index; int col_num; int next_col_num; int line_is_printed; int do_list; unsigned line_num; unsigned next_line_num; } IncludeFileStack; PRIVATE IncludeFileStack include_stack[MAX_INCLUDE_DEPTH]; #endif /*ALLOW_INCLUDE*/ PRIVATE void init_stream(); PRIVATE int push_include_file(),pop_include_file(); #ifdef ALLOW_INCLUDE /* defns of include-file handlers */ PRIVATE int push_include_file(fname,fd) char *fname; FILE *fd; { if (incdepth == MAX_INCLUDE_DEPTH) { yyerror("Oops! include files nested too deep"); return FALSE; } if(debug_latest){ fprintf(list_fd,"\npush_include_file: curr_char=%c (%d)",curr_char,curr_char); } include_stack[incdepth].input_fd = input_fd; input_fd = fd; include_stack[incdepth].fname = current_filename; current_filename = fname; strcpy(include_stack[incdepth].line,line); include_stack[incdepth].curr_char = curr_char; include_stack[incdepth].next_char = next_char; include_stack[incdepth].next_index = next_index; include_stack[incdepth].col_num = col_num; include_stack[incdepth].next_col_num = next_col_num; include_stack[incdepth].line_is_printed = line_is_printed; include_stack[incdepth].line_num = line_num; include_stack[incdepth].next_line_num = next_line_num; include_stack[incdepth].do_list = do_list; incdepth++; init_stream(); return TRUE; } PRIVATE int pop_include_file() { if(debug_latest){ fprintf(list_fd,"\npop_include_file: line %u = %s depth %d",line_num,line, incdepth); } if (incdepth == 0) { /* Stack empty: no include file to pop. */ return FALSE; } incdepth--; if(do_list) { flush_line_out(next_line_num); fprintf(list_fd,"\nResuming file %s:", include_stack[incdepth].fname); } fclose(input_fd); input_fd = include_stack[incdepth].input_fd; current_filename = include_stack[incdepth].fname; strcpy(line,include_stack[incdepth].line); curr_char = include_stack[incdepth].curr_char; next_char = include_stack[incdepth].next_char; next_index = include_stack[incdepth].next_index; col_num = include_stack[incdepth].col_num; next_col_num = include_stack[incdepth].next_col_num; line_is_printed = include_stack[incdepth].line_is_printed; line_num = include_stack[incdepth].line_num; next_line_num = include_stack[incdepth].next_line_num; do_list = include_stack[incdepth].do_list; curr_comment_line = FALSE; prev_line_is_printed = TRUE; initial_flag = TRUE; sticky_EOF = TRUE; return TRUE; } void open_include_file(fname) char *fname; { FILE *fd; #ifdef VMS_INCLUDE int list_option=FALSE; /* /[NO]LIST qualifier: default=NOLIST */ #endif /*VMS_INCLUDE*/ #ifdef VMS_INCLUDE /* for VMS: default extension is .for */ if(has_extension(fname,"/nolist")) { list_option = FALSE; fname[strlen(fname)-strlen("/nolist")] = '\0'; /* trim off qualifier */ } else if(has_extension(fname,"/list")) { list_option = TRUE; fname[strlen(fname)-strlen("/list")] = '\0'; /* trim off qualifier */ } fname = add_ext(fname, DEF_SRC_EXTENSION); #endif if ((fd = fopen(fname,"r")) == NULL) { fprintf(stderr,"\nerror opening include file %s\n",fname); return; } /* Print the INCLUDE line if do_list */ if(do_list) flush_line_out(prev_line_num); /* Report inclusion of file */ if(verbose || do_list) fprintf(list_fd,"\nIncluding file %s:",fname); /* Save the current input stream and then open the include file as input stream. */ if( push_include_file(fname,fd) ) { #ifdef VMS_INCLUDE /* put /[NO]LIST option into effect */ if(do_list != list_option) fprintf(list_fd," (listing %s)", list_option? "on":"off"); do_list = list_option; #endif /*VMS_INCLUDE*/ } else fclose(fd); } #else /* no ALLOW_INCLUDE */ /* disabled forms of include handlers */ PRIVATE int push_include_file(fname,fd) char *fname; FILE *fd; {return FALSE;} PRIVATE int pop_include_file() {return FALSE;} void open_include_file(fname) char *fname; {} #endif /*ALLOW_INCLUDE*/ void init_scan() /* Starts reading a file */ { tab_count = 0; incdepth = 0; line = lineA; /* Start out reading into buffer A */ prev_line = lineB; init_stream(); } PRIVATE void init_stream() /* Initializes a new input stream */ { curr_comment_line = FALSE; inside_quotes = FALSE; line_is_printed = TRUE; prev_line_is_printed = TRUE; noncomment_line_count = 0; next_index = -1; /* Startup as if just read a blank line */ next_char = EOS; curr_char = EOS; next_col_num = 0; next_line_num = 0; prev_line_num = prev_stmt_line_num = 0; sticky_EOF = TRUE; line[0] = '\0'; advance(); /* put 1st two chars in the pipeline */ advance(); advance(); /* gobble the artificial initial EOS */ } void finish_scan() { /* clean up if no END statement at EOF */ check_seq_header((Token *)NULL); /* print last line if not already done */ if(do_list) flush_line_out(line_num); } #ifdef INLINE_COMMENT_CHAR /* macro is used on next_char: must test curr_char to avoid being fooled by '!' without messing up on 'xxx'! either. Note that inside_quotes does not yet reflect curr_char. */ #define inline_comment(c) ((inside_quotes == (curr_char == '\'')) && \ (c)==INLINE_COMMENT_CHAR) #endif void advance() { int eol_skip = FALSE; do{ while(next_char == EOF) { /* Stick at EOF */ if(curr_char == EOS || curr_char == EOF) { /* Pause to allow parse actions at end of stmt to have correct file context before popping the include file. Effect is to send an extra EOS to parser at end of file. */ if(sticky_EOF) { sticky_EOF = FALSE; return; } /* At EOF: close include file if any, otherwise yield an EOF character. */ if( ! pop_include_file() ) { curr_char = EOF; return; } } else { curr_char = EOS; return; } } if(curr_char == EOS) initial_flag = TRUE; if(! eol_skip) { curr_char = next_char; /* Step to next char of input */ col_num = next_col_num; line_num = next_line_num; } if(next_char == '\t'){ /* Handle tabs in input */ next_col_num = nxttab[next_col_num]; if( ! inside_quotes ) tab_count++; /* for portability warning */ } else { next_col_num++; } next_char = line[++next_index]; /* If end of line is reached, input a new line. */ while(next_col_num > max_stmt_col || next_char == '\0' #ifdef INLINE_COMMENT_CHAR || inline_comment(next_char) #endif ){ do{ if(do_list) /* print prev line if not printed yet */ flush_line_out(prev_line_num); if( f77_standard ) { if( !prev_comment_line && is_overlength(prev_line)){ nonstandard(prev_line_num,73); msg_tail(": characters past 72 columns"); } #ifdef INLINE_COMMENT_CHAR if( !curr_comment_line && inline_comment(next_char)){ nonstandard(next_line_num,next_col_num); msg_tail(": inline comment"); } #endif } /* Swap input buffers to get ready for new line. But throw away comment lines if do_list is false, so error messages will work right. */ if(do_list || ! curr_comment_line) { char *temp=line; line = prev_line; prev_line=temp; if(! curr_comment_line) prev_stmt_line_num = line_num; prev_line_num = next_line_num; prev_line_is_printed = line_is_printed; } ++next_line_num; line_is_printed = FALSE; if( getstrn(line,MAXLINE+1,input_fd) == NULL ) { next_char = EOF; line_is_printed = TRUE; return; } /* Keep track of prior-comment-line situation */ prev_comment_line = curr_comment_line; } while( (curr_comment_line = is_comment(line)) != FALSE); ++noncomment_line_count; /* Handle continuation lines */ if( (next_index = is_continuation(line)) != 0) { /* It is a continuation */ if(eol_is_space) { next_char = EOL; next_col_num = 6; } else { next_char = line[++next_index]; next_col_num = 7; eol_skip = TRUE; /* skip continued leading space */ } /* Issue warnings if contin in funny places */ if(noncomment_line_count == 1) warning(next_line_num,6, "Continuation mark found in first statement of file"); if( prev_comment_line ) warning(next_line_num,6, "Continuation follows comment or blank line"); } else { /* It is not a continuation */ next_char = EOS; next_col_num = 0; next_index = -1; } }/*end while( end of line reached )*/ /* Avoid letting a '0' in column 6 become a token */ if(next_col_num == 6 && next_char == '0') next_char = ' '; /* elide EOL and following space of continued stmts if requested */ eol_skip = (eol_skip && isspace(next_char)); }while(eol_skip);/*end do*/ }/* end advance */ /* Function which returns 0 if line is not a comment, 1 if it is. * Comment is ANSI standard: C or c or * in column 1, or blank line. */ PRIVATE int is_comment(s) char s[]; { int i,c= makeupper(s[0]); if( c == 'C' || c == '*' ) return TRUE; for(i=0; s[i] != '\0'; i++) if( !isspace(s[i])) #ifdef INLINE_COMMENT_CHAR if(s[i]==INLINE_COMMENT_CHAR) { if(f77_standard) { int j,col; for(j=0,col=1; j<i; j++) /* compute col num */ if(s[j] == '\t') col = nxttab[col]; else col++; nonstandard(next_line_num,col); msg_tail(": inline comment"); } return TRUE; } else return FALSE; #else return FALSE; #endif return TRUE; /* blank line */ } /* Function which returns 0 if line is a not continuation line. * If line is a continuation, returns index in line of * the continuation mark. */ PRIVATE int is_continuation(s) char s[]; { int col,i,c; /* skip to col 6 */ for(i=0,col=1; col < 6 && s[i] != '\0'; i++) { if(s[i] == '\t') col = nxttab[col]; else col++; } c = s[i]; if ( col == 6 && c != '\0' && !isspace(c) && c != '0') return i; else return 0; } int flush_line_out(n) /* Prints lines up to line #n if not yet printed */ unsigned n; /* Returns TRUE if line was printed, else FALSE */ { /* Print previous line only if do_list TRUE */ if( !prev_line_is_printed && ((n == prev_line_num) || (n > prev_line_num && do_list)) ) { print_a_line(list_fd,prev_line,prev_line_num); prev_line_is_printed = TRUE; } if(n >= next_line_num && !line_is_printed) { print_a_line(list_fd,line,next_line_num); line_is_printed = TRUE; } return ( do_list || (prev_line_is_printed && n == prev_line_num) || (line_is_printed && n == next_line_num) ); } /* Function to read n-1 characters, or up to newline, whichever * comes first. Differs from fgets in that the newline is replaced * by null, and characters up to newline (if any) past the n-1st * are read and thrown away. * Returns NULL when end-of-file or error is encountered. */ PRIVATE char * getstrn(s,n,fd) char s[]; int n; FILE *fd; { int i=0,c; while( (c=getc(fd)) != '\n' ) { if(c == EOF) return NULL; if(i < n-1) s[i++] = c; } s[i] = '\0'; return s; } /* Function which looks ahead as far as end of line to see if input cursor is sitting at start of a token of the given class. */ /* N.B. right now only looks for edit descriptor or relop or complex constant */ int looking_at(token_class) int token_class; { int index; if( eol_is_space && line_num != next_line_num ) return FALSE; /* Looking at next line already */ switch(token_class) { #if 0/* This case is no longer used */ case tok_edit_descriptor: if( ! inside_format ) /* Gotta be inside a format spec */ return FALSE; index = next_index; /* Move past the E or D */ if( ! isdigit(line[index++]) ) return FALSE; /* Must start with w = integer */ while( isdigit(line[index]) ) { ++index; /* Scan over the w part */ } if( line[index++] != '.' ) return FALSE; /* Now must have decimal point */ if( ! isdigit(line[index++]) ) return FALSE; /* Must now have d = integer */ break; #endif case tok_relop: /* called with curr_char == '.' */ if( !isaletter( line[next_index] ) ) /* next char must be letter */ return FALSE; if( makeupper( line[next_index] ) == 'D' ) /* D.P. exponent */ return FALSE; /* if next char is any other letter but 'E', cannot be exponent. If 'E', must be EQ to be relop */ if( makeupper( line[next_index] ) == 'E' && makeupper( line[next_index+1] ) != 'Q' ) return FALSE; break; case tok_complex_const: index = next_index; if( (index = see_a_number(line,index)) < 0 ) return FALSE; while(line[index] != '\0' && isspace(line[index])) index++; if( line[index] != ',' ) return FALSE; ++index; if( (index = see_a_number(line,index)) < 0 ) return FALSE; while(line[index] != '\0' && isspace(line[index])) index++; if(line[index] != ')') return FALSE; break; default: return FALSE; } return TRUE; /* passed all the tests */ } /* see_a_number returns -1 if there is no valid numeric constant in string s starting at index i. If valid number found, it returns the index of the next character after the constant. Leading whitespace in s is skipped.*/ #ifdef BLANKS_IN_NUMBERS #define SKIP_SPACE while(s[i] != '\0' && isspace(s[i])) i++ #else #define SKIP_SPACE #endif PRIVATE int see_a_number(s,i) char s[]; int i; { int isave = i,j; int digit_seen = FALSE; while(s[i] != '\0' && isspace(s[i])) i++; /* move past optional preceding sign */ if(s[i] == '-' || s[i] == '+' ) { i++; SKIP_SPACE; } /* move past ddd or ddd. or .ddd or ddd.ddd */ if(isdigit(s[i])) digit_seen = TRUE; while(isdigit(s[i])) { i++; SKIP_SPACE; } if(s[i] == '.') { i++; SKIP_SPACE; if(isdigit(s[i])) digit_seen = TRUE; while(isdigit(s[i])) { i++; SKIP_SPACE; } } /* no digits seen: bail out now */ if(! digit_seen) return -1; /* look for exponential part. The standard does not allow D, but we will, just in case. */ if(makeupper(s[i]) == 'E' || makeupper(s[i]) == 'D') { i++; if(s[i] == '+' || s[i] == '-') i++; if(!isdigit(s[i])) return -1; while(isdigit(s[i])) i++; } if(debug_latest) {fprintf(list_fd,"\nsee_a_number: "); for(j=isave; j<i; j++) printf("%c",s[j]);} return i; } PRIVATE int is_overlength(s) /* checks line for having nonblanks past col 72 */ char *s; { int i,col; for(col=1,i=0; s[i] != '\0'; i++) { if(col > 72 && !isspace(s[i])) return TRUE; /* Count columns taking tabs into consideration */ if(s[i] == '\t') col = nxttab[col]; else ++col; } return FALSE; } /* End of module Advance */ /* III. Keywords */ /* keywords.c: Determines (to the best of its current ability) whether a given identifier is a keyword or not. Keywords may be used as variable names subject to the following limitations (see forchek.doc for explicit list): Use freely: any keyword with IK | NP flags any keyword with TY flag (data type names) FUNCTION TO Use as scalar variables only (not array, and not char if substring referenced): any keyword with IK flag Reserved: all others (this is now the empty set) */ #define IK 01 /* initial keyword of a statement */ #define NP 02 /* not followed by ( or = if initial */ #define MP 04 /* must be followed by ( */ #define NI 010 /* disallowed in logical IF */ #define EK 020 /* cannot be followed by another keyword */ #define TY 040 /* data type name */ #define EMPTY 256 struct { char *name; int class, context; } keywords[]={ {"ASSIGN", tok_ASSIGN, IK | NP | EK}, {"ACCEPT", tok_ACCEPT, IK | EK}, {"BACKSPACE", tok_BACKSPACE, IK | EK}, {"BLOCK", tok_BLOCK, IK | NP | NI}, {"CALL", tok_CALL, IK | NP | EK}, {"CHARACTER", tok_CHARACTER, IK | NI | EK | TY}, {"CLOSE", tok_CLOSE, IK | EK | MP}, {"COMMON", tok_COMMON, IK | NP | NI | EK}, {"COMPLEX", tok_COMPLEX, IK | NI | EK | TY}, {"CONTINUE", tok_CONTINUE, IK | NP | EK}, {"DATA", tok_DATA, IK | NI | EK}, {"DIMENSION", tok_DIMENSION, IK | NP | NI | EK}, {"DO", tok_DO, IK | NP | NI}, {"DOUBLE", tok_DOUBLE, IK | NP | NI}, {"DOWHILE", tok_DOWHILE, IK | NI | EK}, {"ELSE", tok_ELSE, IK | NP | NI}, {"ELSEIF", tok_ELSEIF, IK | NI | EK}, {"END", tok_END, IK | NP | NI}, {"ENDDO", tok_ENDDO, IK | NP | NI | EK}, {"ENDFILE", tok_ENDFILE, IK | EK}, {"ENDIF", tok_ENDIF, IK | NP | NI | EK}, {"ENTRY", tok_ENTRY, IK | NP | NI | EK}, {"EQUIVALENCE", tok_EQUIVALENCE,IK | NI | EK | MP}, {"EXTERNAL", tok_EXTERNAL, IK | NP | NI | EK}, {"FORMAT", tok_FORMAT, IK | NI | EK | MP}, {"FUNCTION", tok_FUNCTION, NP | NI | EK}, {"GOTO", tok_GOTO, IK | EK}, {"GO", tok_GO, IK | NP}, {"IF", tok_IF, IK | NI | EK}, {"IMPLICIT", tok_IMPLICIT, IK | NP | NI}, {"INCLUDE", tok_INCLUDE, IK | NP | NI | EK}, {"INQUIRE", tok_INQUIRE, IK | EK}, {"INTEGER", tok_INTEGER, IK | NI | EK | TY}, {"INTRINSIC", tok_INTRINSIC, IK | NP | NI | EK}, {"LOGICAL", tok_LOGICAL, IK | NI | EK | TY}, {"OPEN", tok_OPEN, IK | EK | MP}, {"PARAMETER", tok_PARAMETER, IK | NI | EK | MP}, {"PAUSE", tok_PAUSE, IK | NP | EK}, {"PRECISION", tok_PRECISION, IK | NI | EK | TY}, {"PRINT", tok_PRINT, IK | EK}, {"PROGRAM", tok_PROGRAM, IK | NP | NI | EK}, {"READ", tok_READ, IK | EK}, {"REAL", tok_REAL, IK | NI | EK | TY}, {"RETURN", tok_RETURN, IK | EK}, {"REWIND", tok_REWIND, IK | EK}, {"SAVE", tok_SAVE, IK | NP | NI | EK}, {"STOP", tok_STOP, IK | NP | EK}, {"SUBROUTINE", tok_SUBROUTINE, IK | NP | NI | EK}, {"TO", tok_TO, NI | EK}, {"THEN", tok_THEN, IK | NP | EK}, {"TYPE", tok_TYPE, IK | EK}, {"WHILE", tok_WHILE, IK | NI | EK}, {"WRITE", tok_WRITE, IK | EK | MP}, {NULL,0,0}, }; /* Macro to test if all the specified bits are set */ #define MATCH(Context) ((keywords[i].context & (Context)) == (Context)) /* Returns keyword token class or 0 if not a keyword. This version is able to handle those keywords which can only occur at the start of a statement and are never followed by ( or = so that they can be used as variables. */ #ifdef KEYHASHSZ int keyhashtab[KEYHASHSZ]; #else int keyhashtab[1000]; #endif /* Start of is_keyword */ int is_keyword(s) char *s; { unsigned h = kwd_hash(s) % KEYHASHSZ, ans = FALSE, i = keyhashtab[h]; if( i != EMPTY && strcmp(keywords[i].name,s) == 0) { while(iswhitespace(curr_char)) /* Move to lookahead char */ advance(); if(debug_lexer){ fprintf(list_fd, "\nkeyword %s: initialflag=%d ",keywords[i].name,initial_flag); fprintf(list_fd, "context=%o, next char=%c %o",keywords[i].context, curr_char,curr_char); } if( !initial_flag && MATCH(IK) ) { /* Dispose of names which can only occur in initial part of statement, if found elsewhere. */ ans = FALSE; } else if( MATCH(IK|NP) ) { /* Here we disambiguate keywords found in initial part of statement: those which can only occur in initial position and never followed by '(' or '=' */ if( (curr_char != '(') && (curr_char != '=') ) { ans = TRUE; } else { ans = FALSE; } } else if( MATCH(TY) ){ /* Handle data type names. */ if(keywords[i].class == tok_PRECISION) { ans = (prev_token_class == tok_DOUBLE); } else { if( implicit_flag ) ans = TRUE; else ans = (initial_flag && (curr_char != '(') && (curr_char != '=') ); } } else if(keywords[i].class == tok_FUNCTION) { /* FUNCTION is handled as a special case. It must always be followed by a letter (variable never can) */ ans = (isaletter(curr_char)); } else if(keywords[i].class == tok_TO) { /* TO is another special case. Either must follow GO recognized previously or be followed by a variable name (in ASSIGN statement). */ if(prev_token_class == tok_GO) ans = TRUE; else ans = ( isaletter(curr_char) ); } else if( MATCH(IK) ) { /* Handle keywords which must be in initial position, when found in initial position. For the present, these are semi-reserved: if used for variables, must be scalar variables. Then if used as variable must be followed by '=' */ ans = ( curr_char != '=' ); } else{ /* For now, other keywords are reserved. */ ans = TRUE; } } /* end if(strcmp...) */ /* Save initial token class for use by parser. Either set it to keyword token or to id for assignment stmt. */ if(initial_flag) { curr_stmt_class = (ans? keywords[i].class: tok_identifier); } /* Turn off the initial-keyword flag if this is a keyword that cannot be followed by another keyword or if it is not a keyword. */ if(ans) { if(keywords[i].context & EK) initial_flag = FALSE; return keywords[i].class; } else { initial_flag = FALSE; return 0; /* Not found in list */ } } /* End of is_keyword */ /* init_keyhashtab.c: Initializes the keyword hash table by clearing it to EMPTY and then hashes all the keywords into the table. */ void init_keyhashtab() { unsigned i,h; for(i=0;i<KEYHASHSZ;i++) { keyhashtab[i] = EMPTY; } for(i=0; keywords[i].name != NULL; i++) { h = kwd_hash(keywords[i].name) % KEYHASHSZ; if( keyhashtab[h] == EMPTY ) { keyhashtab[h] = i; } else { /* If there is a clash, there is a bug */ #ifdef KEYHASHSZ fprintf(stderr,"Oops-- Keyword hash clash at %s, %s\n", keywords[i].name, keywords[keyhashtab[h]].name); exit(1); #else ++numclashes; /* for use in finding right key hash size */ #endif } } }